In [70]:
import matplotlib as plt
import pandas as pd
import seaborn as sns
In [71]:
tips = sns.load_dataset("tips")
In [72]:
tips
Out[72]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
... ... ... ... ... ... ... ...
239 29.03 5.92 Male No Sat Dinner 3
240 27.18 2.00 Female Yes Sat Dinner 2
241 22.67 2.00 Male Yes Sat Dinner 2
242 17.82 1.75 Male No Sat Dinner 2
243 18.78 3.00 Female No Thur Dinner 2

244 rows × 7 columns

In [4]:
penguins = sns.load_dataset("penguins")
In [5]:
penguins
Out[5]:
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
0 Adelie Torgersen 39.1 18.7 181.0 3750.0 Male
1 Adelie Torgersen 39.5 17.4 186.0 3800.0 Female
2 Adelie Torgersen 40.3 18.0 195.0 3250.0 Female
3 Adelie Torgersen NaN NaN NaN NaN NaN
4 Adelie Torgersen 36.7 19.3 193.0 3450.0 Female
... ... ... ... ... ... ... ...
339 Gentoo Biscoe NaN NaN NaN NaN NaN
340 Gentoo Biscoe 46.8 14.3 215.0 4850.0 Female
341 Gentoo Biscoe 50.4 15.7 222.0 5750.0 Male
342 Gentoo Biscoe 45.2 14.8 212.0 5200.0 Female
343 Gentoo Biscoe 49.9 16.1 213.0 5400.0 Male

344 rows × 7 columns

In [11]:
tips
Out[11]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
... ... ... ... ... ... ... ...
239 29.03 5.92 Male No Sat Dinner 3
240 27.18 2.00 Female Yes Sat Dinner 2
241 22.67 2.00 Male Yes Sat Dinner 2
242 17.82 1.75 Male No Sat Dinner 2
243 18.78 3.00 Female No Thur Dinner 2

244 rows × 7 columns

In [13]:
sns.set_theme()
In [14]:
sns.scatterplot(data=tips, x="total_bill", y="tip")
Out[14]:
<Axes: xlabel='total_bill', ylabel='tip'>
In [15]:
sns.scatterplot(data=tips, x="total_bill", y="tip", hue="smoker")
Out[15]:
<Axes: xlabel='total_bill', ylabel='tip'>
In [56]:
sns.scatterplot(data=tips, x="total_bill", y="tip", hue="sex")
Out[56]:
<Axes: xlabel='total_bill', ylabel='tip'>
In [57]:
sns.scatterplot(data=tips, x="total_bill", y="tip", hue="day")
Out[57]:
<Axes: xlabel='total_bill', ylabel='tip'>
In [18]:
sns.scatterplot(data=tips, x="total_bill", y="tip", hue="sex",style="time")
Out[18]:
<Axes: xlabel='total_bill', ylabel='tip'>
In [19]:
sns.scatterplot(data=tips, x="total_bill", y="tip", hue="sex",style="sex")
Out[19]:
<Axes: xlabel='total_bill', ylabel='tip'>
In [58]:
sns.scatterplot(data=tips, x="total_bill", y="tip", hue="sex",size="size")
Out[58]:
<Axes: xlabel='total_bill', ylabel='tip'>
In [24]:
flights = sns.load_dataset("flights")
In [25]:
flights
Out[25]:
year month passengers
0 1949 Jan 112
1 1949 Feb 118
2 1949 Mar 132
3 1949 Apr 129
4 1949 May 121
... ... ... ...
139 1960 Aug 606
140 1960 Sep 508
141 1960 Oct 461
142 1960 Nov 390
143 1960 Dec 432

144 rows × 3 columns

In [27]:
# Here the plot is plotting the average passengers  
sns.lineplot(data=flights, x="year", y="passengers")
Out[27]:
<Axes: xlabel='year', ylabel='passengers'>
In [28]:
flights[flights['year'] == 1950]
Out[28]:
year month passengers
12 1950 Jan 115
13 1950 Feb 126
14 1950 Mar 141
15 1950 Apr 135
16 1950 May 125
17 1950 Jun 149
18 1950 Jul 170
19 1950 Aug 170
20 1950 Sep 158
21 1950 Oct 133
22 1950 Nov 114
23 1950 Dec 140
In [30]:
flights.passengers.sum()
Out[30]:
40363
In [31]:
#   estimator plots the sum of passengers
    sns.lineplot(data=flights, x="year", y="passengers",estimator=sum)
Out[31]:
<Axes: xlabel='year', ylabel='passengers'>
In [32]:
 sns.lineplot(data=flights, x="year", y="passengers",estimator=max)
Out[32]:
<Axes: xlabel='year', ylabel='passengers'>
In [33]:
sns.lineplot(data=flights,x="year",y="passengers",hue="month")
Out[33]:
<Axes: xlabel='year', ylabel='passengers'>
In [2]:
trips = sns.load_dataset("taxis",parse_dates=["pickup","dropoff"])
In [6]:
trips
Out[6]:
pickup dropoff passengers distance fare tip tolls total color payment pickup_zone dropoff_zone pickup_borough dropoff_borough
0 2019-03-23 20:21:09 2019-03-23 20:27:24 1 1.60 7.0 2.15 0.0 12.95 yellow credit card Lenox Hill West UN/Turtle Bay South Manhattan Manhattan
1 2019-03-04 16:11:55 2019-03-04 16:19:00 1 0.79 5.0 0.00 0.0 9.30 yellow cash Upper West Side South Upper West Side South Manhattan Manhattan
2 2019-03-27 17:53:01 2019-03-27 18:00:25 1 1.37 7.5 2.36 0.0 14.16 yellow credit card Alphabet City West Village Manhattan Manhattan
3 2019-03-10 01:23:59 2019-03-10 01:49:51 1 7.70 27.0 6.15 0.0 36.95 yellow credit card Hudson Sq Yorkville West Manhattan Manhattan
4 2019-03-30 13:27:42 2019-03-30 13:37:14 3 2.16 9.0 1.10 0.0 13.40 yellow credit card Midtown East Yorkville West Manhattan Manhattan
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
6428 2019-03-31 09:51:53 2019-03-31 09:55:27 1 0.75 4.5 1.06 0.0 6.36 green credit card East Harlem North Central Harlem North Manhattan Manhattan
6429 2019-03-31 17:38:00 2019-03-31 18:34:23 1 18.74 58.0 0.00 0.0 58.80 green credit card Jamaica East Concourse/Concourse Village Queens Bronx
6430 2019-03-23 22:55:18 2019-03-23 23:14:25 1 4.14 16.0 0.00 0.0 17.30 green cash Crown Heights North Bushwick North Brooklyn Brooklyn
6431 2019-03-04 10:09:25 2019-03-04 10:14:29 1 1.12 6.0 0.00 0.0 6.80 green credit card East New York East Flatbush/Remsen Village Brooklyn Brooklyn
6432 2019-03-13 19:31:22 2019-03-13 19:48:02 1 3.85 15.0 3.36 0.0 20.16 green credit card Boerum Hill Windsor Terrace Brooklyn Brooklyn

6433 rows × 14 columns

In [38]:
trips['hour'] = trips['pickup'].dt.hour
In [41]:
trips[['hour']]
Out[41]:
hour
0 20
1 16
2 17
3 1
4 13
... ...
6428 9
6429 17
6430 22
6431 10
6432 19

6433 rows × 1 columns

In [49]:
#  plotting mean payment done in cash or credit card
sns.lineplot(data=trips,x='hour',y='total',hue="payment",style="color",ci=None)
C:\Users\SUDIPT PC\AppData\Local\Temp\ipykernel_32480\2583105076.py:2: FutureWarning: 

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.lineplot(data=trips,x='hour',y='total',hue="payment",style="color",ci=None)
Out[49]:
<Axes: xlabel='hour', ylabel='total'>
In [50]:
sns.lineplot(data=trips,x='hour',y='total',hue="payment",style="color",size="passengers",ci=None)
C:\Users\SUDIPT PC\AppData\Local\Temp\ipykernel_32480\2028404596.py:1: FutureWarning: 

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.lineplot(data=trips,x='hour',y='total',hue="payment",style="color",size="passengers",ci=None)
Out[50]:
<Axes: xlabel='hour', ylabel='total'>
In [60]:
sns.relplot(data=tips, x="total_bill", y="tip", col="sex")
Out[60]:
<seaborn.axisgrid.FacetGrid at 0x2883700d360>
In [61]:
sns.relplot(data=tips, x="total_bill", y="tip", hue="smoker", col="sex")
Out[61]:
<seaborn.axisgrid.FacetGrid at 0x28837269840>
In [62]:
sns.relplot(data=tips, x="total_bill", y="tip", hue="smoker", col="sex", row="time")
Out[62]:
<seaborn.axisgrid.FacetGrid at 0x288370b9db0>
In [68]:
sns.relplot(data=trips, x="hour",y="total",kind="line",hue="payment")
Out[68]:
<seaborn.axisgrid.FacetGrid at 0x2883c2bf490>
In [67]:
sns.relplot(data=trips, x="hour",
            y="total",kind="line",
            col="pickup_borough",
            hue="payment")
Out[67]:
<seaborn.axisgrid.FacetGrid at 0x2883c1c4730>
In [66]:
sns.relplot(data=trips, x="hour",
            y="total",kind="line",
            col="pickup_borough",
            hue="payment",row="dropoff_borough")
Out[66]:
<seaborn.axisgrid.FacetGrid at 0x28837917a30>
In [79]:
sns.relplot(data=trips, x="hour",
            y="total",kind="line",
            col="pickup_borough",
            hue="payment",height=4)
Out[79]:
<seaborn.axisgrid.FacetGrid at 0x28843f91bd0>
In [73]:
sns.relplot(data=tips, x="total_bill", y="tip", hue="smoker",col="sex",row="time",height=5,aspect=1.5)
Out[73]:
<seaborn.axisgrid.FacetGrid at 0x1433e353be0>
In [86]:
sns.histplot(data=tips,x="tip")
Out[86]:
<Axes: xlabel='tip', ylabel='Count'>
In [87]:
sns.histplot(data=tips,x="tip",hue="time")
Out[87]:
<Axes: xlabel='tip', ylabel='Count'>
In [92]:
sns.histplot(data=tips,x="tip",hue="smoker",multiple="stack")
Out[92]:
<Axes: xlabel='tip', ylabel='Count'>
In [93]:
sns.histplot(data=tips,x="tip",hue="smoker",multiple="dodge")
Out[93]:
<Axes: xlabel='tip', ylabel='Count'>
In [102]:
sns.histplot(data=penguins,x="body_mass_g",bins=30,binwidth=100,hue="species",multiple="stack")
Out[102]:
<Axes: xlabel='body_mass_g', ylabel='Count'>
In [103]:
sns.histplot(data=penguins,x="body_mass_g",bins=30,binwidth=100,hue="species",multiple="stack",element="step")
Out[103]:
<Axes: xlabel='body_mass_g', ylabel='Count'>
In [104]:
sns.histplot(data=penguins,x="body_mass_g",bins=30,binwidth=100,hue="species",multiple="stack",element="step",kde=True)
Out[104]:
<Axes: xlabel='body_mass_g', ylabel='Count'>
In [107]:
sns.kdeplot(data=penguins,x="body_mass_g",hue="species")
Out[107]:
<Axes: xlabel='body_mass_g', ylabel='Density'>
In [108]:
sns.kdeplot(data=penguins,x="body_mass_g",hue="species",bw_adjust=0.2)
Out[108]:
<Axes: xlabel='body_mass_g', ylabel='Density'>
In [109]:
sns.kdeplot(data=penguins,x="body_mass_g",hue="species",multiple="stack")
Out[109]:
<Axes: xlabel='body_mass_g', ylabel='Density'>
In [110]:
sns.histplot(data=penguins,x="body_mass_g")
Out[110]:
<Axes: xlabel='body_mass_g', ylabel='Count'>
In [112]:
sns.histplot(data=penguins,x="flipper_length_mm")
Out[112]:
<Axes: xlabel='flipper_length_mm', ylabel='Count'>
In [113]:
sns.histplot(data=penguins,x="body_mass_g",y="flipper_length_mm")
Out[113]:
<Axes: xlabel='body_mass_g', ylabel='flipper_length_mm'>
In [114]:
sns.kdeplot(data=penguins,x="body_mass_g",y="flipper_length_mm")
Out[114]:
<Axes: xlabel='body_mass_g', ylabel='flipper_length_mm'>
In [115]:
sns.kdeplot(data=penguins,x="body_mass_g",y="flipper_length_mm",hue="species")
Out[115]:
<Axes: xlabel='body_mass_g', ylabel='flipper_length_mm'>
In [116]:
sns.kdeplot(data=penguins,x="bill_length_mm",y="flipper_length_mm",hue="species")
Out[116]:
<Axes: xlabel='bill_length_mm', ylabel='flipper_length_mm'>
In [117]:
sns.histplot(data=penguins,x="bill_length_mm",y="flipper_length_mm",hue="species")
Out[117]:
<Axes: xlabel='bill_length_mm', ylabel='flipper_length_mm'>

Rugplots¶

In [120]:
sns.rugplot(data=tips,x="tip",height=0.5)
Out[120]:
<Axes: xlabel='tip'>
In [122]:
sns.rugplot(data=tips,y="tip",height=0.8)
Out[122]:
<Axes: ylabel='tip'>
In [126]:
sns.kdeplot(data=tips,x="total_bill")
sns.rugplot(data=tips,x="total_bill",height=0.05)
Out[126]:
<Axes: xlabel='total_bill', ylabel='Density'>
In [69]:
sns.scatterplot(data=tips,x="total_bill",y="tip")
sns.rugplot(data=tips,x="total_bill",y="tip")
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[69], line 1
----> 1 sns.scatterplot(data=tips,x="total_bill",y="tip")
      2 sns.rugplot(data=tips,x="total_bill",y="tip")

NameError: name 'tips' is not defined
In [133]:
penguins
Out[133]:
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
0 Adelie Torgersen 39.1 18.7 181.0 3750.0 Male
1 Adelie Torgersen 39.5 17.4 186.0 3800.0 Female
2 Adelie Torgersen 40.3 18.0 195.0 3250.0 Female
3 Adelie Torgersen NaN NaN NaN NaN NaN
4 Adelie Torgersen 36.7 19.3 193.0 3450.0 Female
... ... ... ... ... ... ... ...
339 Gentoo Biscoe NaN NaN NaN NaN NaN
340 Gentoo Biscoe 46.8 14.3 215.0 4850.0 Female
341 Gentoo Biscoe 50.4 15.7 222.0 5750.0 Male
342 Gentoo Biscoe 45.2 14.8 212.0 5200.0 Female
343 Gentoo Biscoe 49.9 16.1 213.0 5400.0 Male

344 rows × 7 columns

In [134]:
sns.ecdfplot(data=penguins,x="body_mass_g",hue="species")
Out[134]:
<Axes: xlabel='body_mass_g', ylabel='Proportion'>
In [155]:
sns.displot(kind="hist",data=penguins,x="body_mass_g",height=4,col="species",hue="sex",row="island")
Out[155]:
<seaborn.axisgrid.FacetGrid at 0x28860f8c190>
In [152]:
sns.displot(kind="hist",data=penguins,x="body_mass_g",height=4,col="species",hue="sex",multiple="dodge")
Out[152]:
<seaborn.axisgrid.FacetGrid at 0x2885de1f0a0>
In [153]:
sns.displot(kind="hist",data=penguins,x="body_mass_g",height=4,col="species",hue="sex",element="step")
Out[153]:
<seaborn.axisgrid.FacetGrid at 0x288359a3010>
In [160]:
sns.displot(data=tips,kind="kde",x="total_bill",y="tip",rug=True)
Out[160]:
<seaborn.axisgrid.FacetGrid at 0x2883595b070>
In [6]:
penguins
Out[6]:
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
0 Adelie Torgersen 39.1 18.7 181.0 3750.0 Male
1 Adelie Torgersen 39.5 17.4 186.0 3800.0 Female
2 Adelie Torgersen 40.3 18.0 195.0 3250.0 Female
3 Adelie Torgersen NaN NaN NaN NaN NaN
4 Adelie Torgersen 36.7 19.3 193.0 3450.0 Female
... ... ... ... ... ... ... ...
339 Gentoo Biscoe NaN NaN NaN NaN NaN
340 Gentoo Biscoe 46.8 14.3 215.0 4850.0 Female
341 Gentoo Biscoe 50.4 15.7 222.0 5750.0 Male
342 Gentoo Biscoe 45.2 14.8 212.0 5200.0 Female
343 Gentoo Biscoe 49.9 16.1 213.0 5400.0 Male

344 rows × 7 columns

In [7]:
sns.countplot(data=penguins,x="species")
Out[7]:
<Axes: xlabel='species', ylabel='count'>
In [10]:
penguins["species"].value_counts().plot(kind='bar',color=["red","yellow","magenta"])
Out[10]:
<Axes: >
In [11]:
sns.countplot(data=penguins,x="species",hue="sex")
Out[11]:
<Axes: xlabel='species', ylabel='count'>
In [55]:
titanic = pd.read_csv("titanic.csv")
In [56]:
sns.countplot(data=titanic,y="pclass",hue="sex")
Out[56]:
<Axes: xlabel='count', ylabel='pclass'>

Stripplots and swarmplots¶

In [15]:
trips_df = trips.dropna()
In [39]:
my_palette = ["red","green","magenta","yellow"]
sns.catplot(kind="strip",data=trips_df,x="pickup_borough",y="distance",palette=my_palette,height=4,aspect=2.5)
C:\Users\SUDIPT PC\AppData\Local\Temp\ipykernel_12580\2751856868.py:2: FutureWarning: Passing `palette` without assigning `hue` is deprecated.
  sns.catplot(kind="strip",data=trips_df,x="pickup_borough",y="distance",palette=my_palette,height=4,aspect=2.5)
Out[39]:
<seaborn.axisgrid.FacetGrid at 0x143362a0040>
In [3]:
trips = sns.load_dataset("taxis",parse_dates=["pickup","dropoff"])
In [10]:
trips_sample = trips.nlargest(600,"total").dropna()
In [26]:
trips_sample
Out[26]:
pickup dropoff passengers distance fare tip tolls total color payment pickup_zone dropoff_zone pickup_borough dropoff_borough
5364 2019-03-17 16:59:17 2019-03-17 18:04:08 2 36.70 150.00 0.00 24.02 174.82 yellow cash JFK Airport JFK Airport Queens Queens
4218 2019-03-29 17:32:20 2019-03-29 18:53:52 1 26.92 75.50 23.19 0.00 100.49 yellow credit card JFK Airport Cobble Hill Queens Brooklyn
5567 2019-03-07 00:28:57 2019-03-07 02:02:55 1 25.51 93.50 0.00 0.00 94.80 green credit card Sunset Park West Saint Albans Brooklyn Queens
5827 2019-03-05 10:34:36 2019-03-05 11:44:01 1 20.64 86.14 0.00 5.76 92.40 green credit card Coney Island Upper East Side North Brooklyn Manhattan
2387 2019-03-28 15:58:52 2019-03-28 15:59:25 1 1.80 69.06 20.80 0.00 90.16 yellow credit card JFK Airport JFK Airport Queens Queens
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2186 2019-03-05 23:20:47 2019-03-05 23:43:48 3 7.84 25.00 0.00 5.76 34.56 yellow cash Jackson Heights Clinton East Queens Manhattan
2686 2019-03-11 10:42:37 2019-03-11 11:11:12 1 7.30 25.50 5.76 0.00 34.56 yellow credit card Upper West Side South Financial District North Manhattan Manhattan
3139 2019-03-17 12:53:40 2019-03-17 13:35:57 1 3.44 25.50 5.76 0.00 34.56 yellow credit card Garment District Upper East Side North Manhattan Manhattan
3857 2019-03-30 08:29:43 2019-03-30 08:44:54 1 7.68 23.00 5.00 5.76 34.56 yellow credit card East Harlem South LaGuardia Airport Manhattan Queens
5425 2019-03-28 13:03:43 2019-03-28 13:32:27 1 7.40 25.50 5.76 0.00 34.56 yellow credit card Battery Park City Midtown East Manhattan Manhattan

577 rows × 14 columns

In [37]:
colors = ['orange','red','green','purple']
sns.catplot(data=trips_sample,kind="swarm",x="pickup_borough",y="total",palette=colors,height=5,aspect=2)
C:\Users\SUDIPT PC\AppData\Local\Temp\ipykernel_12580\4274078373.py:2: FutureWarning: Passing `palette` without assigning `hue` is deprecated.
  sns.catplot(data=trips_sample,kind="swarm",x="pickup_borough",y="total",palette=colors,height=5,aspect=2)
Out[37]:
<seaborn.axisgrid.FacetGrid at 0x143366e1ff0>
C:\Users\SUDIPT PC\anaconda3\lib\site-packages\seaborn\categorical.py:3544: UserWarning: 6.5% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
  warnings.warn(msg, UserWarning)

Boxplots¶

In [66]:
titanic
Out[66]:
pclass survived name sex age sibsp parch ticket fare cabin embarked boat body home.dest
0 1 1 Allen, Miss. Elisabeth Walton female 29 0 0 24160 211.3375 B5 S 2 ? St Louis, MO
1 1 1 Allison, Master. Hudson Trevor male 0.9167 1 2 113781 151.55 C22 C26 S 11 ? Montreal, PQ / Chesterville, ON
2 1 0 Allison, Miss. Helen Loraine female 2 1 2 113781 151.55 C22 C26 S ? ? Montreal, PQ / Chesterville, ON
3 1 0 Allison, Mr. Hudson Joshua Creighton male 30 1 2 113781 151.55 C22 C26 S ? 135 Montreal, PQ / Chesterville, ON
4 1 0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25 1 2 113781 151.55 C22 C26 S ? ? Montreal, PQ / Chesterville, ON
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1304 3 0 Zabour, Miss. Hileni female 14.5 1 0 2665 14.4542 ? C ? 328 ?
1305 3 0 Zabour, Miss. Thamine female ? 1 0 2665 14.4542 ? C ? ? ?
1306 3 0 Zakarian, Mr. Mapriededer male 26.5 0 0 2656 7.225 ? C ? 304 ?
1307 3 0 Zakarian, Mr. Ortin male 27 0 0 2670 7.225 ? C ? ? ?
1308 3 0 Zimmerman, Mr. Leo male 29 0 0 315082 7.875 ? S ? ? ?

1309 rows × 14 columns

In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: